/*******************************************************************************
NETS_Largest_Firm_in_Town_Data.do

This file finds the total firm employment of the firm with the greatest number 
of plants in a each industry-location pair for M=3, 6, 12, 24, 48. In cases
where multiple firms tie for greatest number of plants, it uses the average of
their log total firm employments. Additionally for M=12, it repeats this 
exercise, handling ties by either 1) discarding the industry-location pairs for 
which ties occur or 2) using the firm with the highest national employment among
firms that tie. It then repeats these steps again for M=12, 1) using firms with 
at least X=10, 20, 50, 100 plants, 2) using only non-imputed data, and 3) 
recomputing total firm employment to exclude the firm's own contribution in the 
particular location.

Last updated: 5/3/2021
*******************************************************************************/

version 15
cd "C:\Plants_in_Space"
set more off
set type double
set varabbrev off
graph set window fontface "Times New Roman" 

/*******************************************************************************
For each M, find the log firm employment of the firm with the most plants in 
each industry-location pair (computing the average log firm employment among 
firms that tie). Additionally for M=12, handle ties using alternative methods.
In the resulting data, each observation is a industry-location pair and the 
variables of interest are log firm employment of the firm with the most plants 
and the location's log population density.
*******************************************************************************/
use "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned.dta", clear	
by sic8, sort: egen industry_emp = total(emp)

foreach m of numlist 3 6 12 24 48 {
	foreach method in mean_of max_of discard {
		if (`m' != 12) & ("`method'" != "mean_of") {
			continue
		}
		preserve
			// Calculate number of establishments in each firm-location pair
			by ID_`m' hq sic8, sort: gen firm_location_num_estab = _N
			
			// Keep unique firm-location pairs
			by ID_`m' hq sic8, sort: keep if _n == 1
			
			// Keep the firm with the greatest number of plants in each 
			// industry-location pair (keeping all ties)
			by ID_`m' sic8, sort: egen max_industry_location_num_estab = max(firm_location_num_estab)
			keep if firm_location_num_estab == max_industry_location_num_estab
			drop max_industry_location_num_estab
			
			by ID_`m' sic8, sort: gen num_of_firms_with_max_num_estab = _N
			
			// Merge with population data
			merge m:1 ID_`m' using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M`m'.dta", keepusing(pop_density_`m') assert(2 3) keep(3) nogen
			gen log_pop_density_`m' = log(pop_density_`m')
			
			// Three methods to handle ties:
			// 1) discard industry-location pairs where multiple firms are tied
			//    for most plants
			// 2) use the average log firm employment of firms tied with the most
			//    plants in an industry-location pairs
			// 3) use the maximum log firm employment among the firms tied with 
			//    the most plants in an industry-location pairs
			if "`method'" == "discard" {
				drop if num_of_firms_with_max_num_estab > 1
			}
			else if "`method'" == "mean_of" {
				by ID_`m' sic8, sort: egen avg_log_firm_employment = mean(log_firm_employment)
				by ID_`m' sic8: keep if _n == 1
				drop log_firm_employment
				rename avg_log_firm_employment log_firm_employment
			}
			else {
				gsort ID_`m' sic8 -firm_location_num_estab -log_firm_employment
				by ID_`m' sic8: keep if _n == 1
			}
			
			keep establishment hq sic8 sic2 emp emp_record_code ID_`m' firm_employment ///
				 log_firm_employment industry_emp firm_location_num_estab log_pop_density_`m'
			save "Data\Final\TablesIII_VI\largest_firm_in_town_M`m'_`method'_ties_data.dta", replace
		restore
	}
}


*******************Repeat, using firms with at least X plants*******************
// Since observations in the resulting data are not at the firm level, we need
// to repeat the exercise, first removing the firms we wish to exclude.
use "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned.dta", clear	
foreach X of numlist 10 20 50 100 {
	preserve
		// Keep only firms with at least X plants
		merge m:1 hq sic8 using "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_hq-sic8_of_firms_with_at_least_`X'_plants.dta", assert(1 3) keep(3) nogen
		by sic8, sort: egen industry_emp = total(emp)
	
		// Calculate number of establishments in each firm-location pair
		by ID_12 hq sic8, sort: gen firm_location_num_estab = _N
		
		// Keep unique firm-location pairs
		by ID_12 hq sic8, sort: keep if _n == 1
		
		// Keep the firm with the greatest number of plants in each 
		// industry-location pair (keeping all ties)
		by ID_12 sic8, sort: egen max_industry_location_num_estab = max(firm_location_num_estab)
		keep if firm_location_num_estab == max_industry_location_num_estab
		drop max_industry_location_num_estab
		
		by ID_12 sic8, sort: gen num_of_firms_with_max_num_estab = _N
		
		// Merge with population data
		merge m:1 ID_12 using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M12.dta", keepusing(pop_density_12) assert(2 3) keep(3) nogen
		gen log_pop_density_12 = log(pop_density_12)

		// Use the average log firm employment of firms tied with the most
		// plants in an industry-location pair
		by ID_12 sic8, sort: egen avg_log_firm_employment = mean(log_firm_employment)
		by ID_12 sic8: keep if _n == 1
		drop log_firm_employment
		rename avg_log_firm_employment log_firm_employment
		
		keep establishment hq sic8 sic2 emp emp_record_code ID_12 firm_employment ///
			 log_firm_employment industry_emp firm_location_num_estab log_pop_density_12

		save "Data\Final\TablesIII_VI\largest_firm_in_town_M12_firms_with_at_least_`X'_plants_data.dta", replace
	restore
}


*************************Repeat, using non-imputed data*************************
use "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned.dta", clear
	
// Keep only non-imputed data, and redefine total firm employment (these include
// non-imputed data in their calculation)
keep if emp_record_code==0
drop firm_employment log_firm_employment
by hq sic8, sort: egen firm_employment = total(emp)
gen log_firm_employment = log(firm_employment)

by sic8, sort: egen industry_emp = total(emp)

// Calculate number of establishments in each firm-location pair
by ID_12 hq sic8, sort: gen firm_location_num_estab = _N

// Keep unique firm-location pairs
by ID_12 hq sic8, sort: keep if _n == 1

// Keep the firm with the greatest number of plants in each industry-location 
// pair (keeping all ties)
by ID_12 sic8, sort: egen max_industry_location_num_estab = max(firm_location_num_estab)
keep if firm_location_num_estab == max_industry_location_num_estab
drop max_industry_location_num_estab

by ID_12 sic8, sort: gen num_of_firms_with_max_num_estab = _N

// Merge with population data
merge m:1 ID_12 using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M12.dta", keepusing(pop_density_12) assert(2 3) keep(3) nogen
gen log_pop_density_12 = log(pop_density_12)

// Use the average log firm employment of firms tied with the most plants in an 
// industry-location pair
by ID_12 sic8, sort: egen avg_log_firm_employment = mean(log_firm_employment)
by ID_12 sic8: keep if _n == 1
drop log_firm_employment
rename avg_log_firm_employment log_firm_employment

keep establishment hq sic8 sic2 emp emp_record_code ID_12 firm_employment ///
	 log_firm_employment industry_emp firm_location_num_estab log_pop_density_12
save "Data\Final\TablesIII_VI\largest_firm_in_town_M12_nonimputed_data.dta", replace


***********************Repeat, excluding own contribution***********************
use "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned.dta", clear	
by sic8, sort: egen industry_emp = total(emp)

// Recompute total firm employment net of the firm's employment in the particular 
// location
by hq sic8 ID_12, sort: egen firm_location_emp = total(emp)
gen firm_emp_net_location = firm_employment - firm_location_emp
gen log_firm_emp_net_location = log(firm_emp_net_location)
drop if missing(log_firm_emp_net_location)

// Calculate number of establishments in each firm-location pair
by ID_12 hq sic8, sort: gen firm_location_num_estab = _N

// Keep unique firm-location pairs
by ID_12 hq sic8, sort: keep if _n == 1

// Keep the firm with the greatest number of plants in each industry-location 
// pair (keeping all ties)
by ID_12 sic8, sort: egen max_industry_location_num_estab = max(firm_location_num_estab)
keep if firm_location_num_estab == max_industry_location_num_estab
drop max_industry_location_num_estab

by ID_12 sic8, sort: gen num_of_firms_with_max_num_estab = _N

// Merge with population data
merge m:1 ID_12 using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M12.dta", keepusing(pop_density_12) assert(2 3) keep(3) nogen
gen log_pop_density_12 = log(pop_density_12)

// Use the average log firm employment of firms tied with the most plants in an 
// industry-location pair
by ID_12 sic8, sort: egen avg_log_firm_emp_net_location = mean(log_firm_emp_net_location)
by ID_12 sic8: keep if _n == 1
drop log_firm_emp_net_location
rename avg_log_firm_emp_net_location log_firm_emp_net_location

keep establishment hq sic8 sic2 emp emp_record_code ID_12 firm_employment ///
	 log_firm_employment industry_emp firm_location_num_estab log_pop_density_12 ///
	 firm_emp_net_location log_firm_emp_net_location
save "Data\Final\TablesIII_VI\largest_firm_in_town_M12_excluding_own_contribution_data.dta", replace
